Gradienten er
\(\begin{bmatrix} \frac{1}{n} \sum_i^n 2(a + bs_i - d_i) \\ \frac{1}{n} \sum_i^n 2(a + bs_i -d_i)s_i \end{bmatrix}\)
ab <- c(-20,3)
n <- 50
ms <- function(ab) {
ab[1] + ab[2] * cars$speed
}
f_i <- function(ab) {
(ms(ab) - cars$dist)^2
}
f <- function(ab) {
1/n * sum(f_i(ab))
}
d_f <- function(ab) {
grad(f,ab)
}
dd_f <- function(ab) {
hessian(f,ab)
}
backtrack <- function(a_bar = 3, rho = 0.3, c = 0.2, func = f, Dfunc = d_f, x_k = ab, c_2 = 0.5) {
a <- a_bar
itt <- 0
while ( (func(x_k + a * (-Dfunc(x_k)) ) ) > (func(x_k) + c * a * t(Dfunc(x_k)) %*% (-Dfunc(x_k))) ) {
# while (t(Dfunc(x_k + a * -Dfunc(x_k))) %*% -Dfunc(x_k) < c_2 * t(Dfunc(x_k)) %*% -Dfunc(x_k) ) {
itt <- itt + 1
a <- rho * a
# }
}
a
}
#steepest descent
optimer_identitet <- function(x_0 = ab) {
x_k <- x_0
a_k <- 2
itt <- 0
steps <- c()
plot(dist ~ speed, cars)
while (norm(t(d_f(x_k))) > 1e-5) {
itt <- 1 + itt
p_k <- -d_f(x_k)
a_k <- backtrack(a_k, rho = 0.5, c = 0.001, f, d_f, x_k)
x_k <- x_k + a_k * t(p_k)
steps[itt] <- a_k
if (itt %% 500 == 0) {
abline(x_k)
}
if (itt == 100000) {
break
}
}
cat("x* = ", x_k, "f(x*) =", f(x_k), "iteration =", itt, "steps = ", steps[1:5])
}
# optimer_identitet()
#newton
optimer_newton <- function(x_0 = ab, output = TRUE) {
x_k <- x_0
a_k <- 2
itt <- 0
steps <- c()
x_plot <- c()
y_plot <- c()
plot(dist ~ speed, cars)
while (norm(t(d_f(x_k))) > 1e-4) {
itt <- 1 + itt
p_k <- solve(dd_f(x_k), -d_f(x_k))
a_k <- backtrack(a_k, rho = 0.5, c = 0.01, f, d_f, x_k)
x_k <- x_k + a_k * t(p_k)
if (itt %% 50 == 0) {
abline(x_k)
x_plot[itt/50] <- x_k[1]
y_plot[itt/50] <- x_k[2]
}
steps[itt] <- a_k
if (itt == 100000) {
break
}
}
if (output == TRUE) {
cat("x* = ", x_k, "f(x*) =", f(x_k), "iteration =", itt, "steps = ", steps[1:5])
}
else
cbind(x_plot, y_plot)
}
xy <- optimer_newton(output = FALSE)
optimer_newton(output = TRUE)
## x* = -17.5791 3.932409 f(x*) = 227.0704 iteration = 7956 steps = 0.001953125 0.001953125 0.001953125 0.001953125 0.001953125
ms_plot <- function(x,y) {
x + y * cars$speed
}
f_i_plot <- function(x,y) {
(ms_plot(x,y) - cars$dist)^2
}
f_plot <- function(x,y) {
1/n * sum(f_i_plot(x,y))
}
fkts_vaerdier <- apply(xy, 1, f)
# x <- seq(-20,-17,length = 200)
# y <- seq(2.9,4.2, length = 200)
x <- xy[,1]
y <- xy[,2]
z <- outer(x, y, FUN = Vectorize("f_plot"))
plot_ly(x = x, y = y, z = ~z) %>% add_surface() %>%
add_trace(x = xy[,1], y = xy[,2], z = fkts_vaerdier)
## No trace type specified:
## Based on info supplied, a 'scatter3d' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter3d
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
summary(lm(cars$dist ~ cars$speed))
##
## Call:
## lm(formula = cars$dist ~ cars$speed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.069 -9.525 -2.272 9.215 43.201
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -17.5791 6.7584 -2.601 0.0123 *
## cars$speed 3.9324 0.4155 9.464 1.49e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.38 on 48 degrees of freedom
## Multiple R-squared: 0.6511, Adjusted R-squared: 0.6438
## F-statistic: 89.57 on 1 and 48 DF, p-value: 1.49e-12
optim(ab, f, hessian = TRUE)
## $par
## [1] -17.583175 3.932699
##
## $value
## [1] 227.0704
##
## $counts
## function gradient
## 53 NA
##
## $convergence
## [1] 0
##
## $message
## NULL
##
## $hessian
## [,1] [,2]
## [1,] 2.0 30.80
## [2,] 30.8 529.12
phi_lig_nul <- function(x_k) {
1/n * sum((x_k[2] * cars$speed + x_k[1] - cars$dist) /(d_f(x_k)[2] * cars$speed + d_f(x_k)[1]))
}
optimer_identitet_phi <- function(x_0 = ab) {
x_k <- x_0
a_k <- 2
itt <- 0
steps <- c()
plot(dist ~ speed, cars)
while (norm(t(d_f(x_k))) > 1e-5) {
itt <- 1 + itt
p_k <- -d_f(x_k)
a_k <- phi_lig_nul(x_k)
x_k <- x_k + a_k * t(p_k)
steps[itt] <- a_k
if (itt %% 1 == 0) {
abline(x_k)
}
if (itt == 100000) {
break
}
}
cat("x* = ", x_k, "f(x*) =", f(x_k), "iteration =", itt, "steps = ", steps[1:5])
}
# optimer_identitet_phi()
optimer_newton_phi <- function(x_0 = ab) {
x_k <- x_0
a_k <- 2
itt <- 0
steps <- c()
x_plot <- c()
f_plot <- c()
plot(dist ~ speed, cars)
while (norm(t(d_f(x_k))) > 1e-5) {
itt <- 1 + itt
p_k <- solve(dd_f(x_k), -d_f(x_k))
a_k <- phi_lig_nul(x_k)
x_k <- x_k + a_k * t(p_k)
if (itt %% 50 == 0) {
abline(x_k)
}
steps[itt] <- a_k
if (itt == 100000) {
break
}
}
cat("x* = ", x_k, "f(x*) =", f(x_k), "iteration =", itt, "steps = ", steps[1:5])
}
optimer_newton_phi()
## x* = -17.45905 3.978643 f(x*) = 227.8213 iteration = 1e+05 steps = 0.002058539 0.00205874 0.002058941 0.002059142 0.002059344